This notebook compares doublet results calculated on benchmarking datasets to one another, with the primary goal of addressing these questions:
There are several items to bear in mind when interpreting these results:
cxds, as we are using it, does not have a specific
threshold for calling droplets. By contrast, both scrublet
and scDblFinder identify a threshold based on the given
dataset they are processing. This notebook uses a doublet threshold of
>=0.5 for cxds, which may not be
universally suitable (though choosing a universally suitable threshold
is not easy either!).suppressPackageStartupMessages({
library(SingleCellExperiment)
library(ggplot2)
library(patchwork)
library(caret)
library(UpSetR)
})
theme_set(theme_bw())
# define threshold used to call cxds
cxds_threshold <- 0.5
module_base <- rprojroot::find_root(rprojroot::is_renv_project)
data_dir <- file.path(module_base, "scratch", "benchmark-datasets")
result_dir <- file.path(module_base, "results", "benchmark-results")
plot_pca_calls <- function(df,
color_column,
pred_column,
title) {
# Plot PCs colored by singlet/doublet, showing doublets on top
# df is expected to contain columns PC1, PC2, `color_column`, and `pred_column`. These should _not_ be provided as strings
ggplot(df) +
aes(x = PC1,
y = PC2,
color = {{color_column}}) +
geom_point(
size = 0.75,
alpha = 0.6
) +
scale_color_manual(name = "", values = c("black", "gray80")) +
geom_point(
data = dplyr::filter(df, {{color_column}} == "doublet"),
color = "black",
size = 0.75
) +
ggtitle(title)
}
plot_pca_metrics <- function(df, color_column, metric_colors) {
# Plot PCs colored by performance metric, showing false calls on top
# metric_colors is a named vector of colors used for coloring tp/tn/fp/fn
# df is expected to contain columns PC1, PC2, and `color_column`. This should _not_ be provided as a string.
ggplot(df) +
aes(x = PC1,
y = PC2,
color = {{color_column}}) +
geom_point(
size = 0.75,
alpha = 0.6
) +
geom_point(
data = dplyr::filter(df, {{color_column}} %in% c("fp", "fn")),
size = 0.75
) +
scale_color_manual(name = "Call type", values = metric_colors) +
ggtitle("Consensus class metrics")
}
First, we’ll read in and combine doublet results into a list of data frames for each dataset. We’ll also create new columns for each dataset:
consensus_class, which will be “doublet” if all three
methods are doublet, “singlet” if all three methods are singlet, and
“ambiguous” if there are any disagreementsconsensus_call, which will be “doublet” if all
methods predict “doublet,” and “singlet” otherwiseconfusion_call, which will classify the consensus call
as one of “tp”, “tn”, “fp”, or “fn” (true/false positive/negative) based
on consensus_call# find all dataset names to process:
dataset_names <- list.files(result_dir, pattern = "*_scrublet.tsv") |>
stringr::str_remove("_scrublet.tsv")
# used in PCA plots
confusion_colors <- c(
"tp" = "lightblue",
"tn" = "pink",
"fp" = "blue",
"fn" = "firebrick2"
)
# Read in data for analysis
doublet_df_list <- dataset_names |>
purrr::map(
\(dataset) {
scdbl_tsv <- file.path(result_dir, glue::glue("{dataset}_scdblfinder.tsv"))
scrub_tsv <- file.path(result_dir, glue::glue("{dataset}_scrublet.tsv"))
sce_file <- file.path(data_dir, dataset, glue::glue("{dataset}_sce.rds"))
scdbl_df <- scdbl_tsv |>
readr::read_tsv(show_col_types = FALSE) |>
dplyr::select(
barcodes,
cxds_score,
scdbl_score = score,
scdbl_prediction = class
) |>
# add cxds calls at `cxds_threshold` threshold
dplyr::mutate(
cxds_prediction = dplyr::if_else(
cxds_score >= cxds_threshold,
"doublet",
"singlet"
)
)
scrub_df <- readr::read_tsv(scrub_tsv, show_col_types = FALSE)
# grab ground truth and PCA coordinates
sce <- readr::read_rds(sce_file)
dataset_df <- scuttle::makePerCellDF(sce, use.dimred = "PCA") |>
tibble::rownames_to_column(var = "barcodes") |>
dplyr::select(barcodes,
ground_truth = ground_truth_doublets,
PC1 = PCA.1,
PC2 = PCA.2) |>
dplyr::left_join(
scrub_df,
by = "barcodes"
) |>
dplyr::left_join(
scdbl_df,
by = "barcodes"
)
# Add consensus columns:
# - consensus_class: "doublet" if all three methods are doublet, "singlet" if all three methods are singlet,
# and "ambiguous" if there are any disagreements
# - consensus_call: "doublet" if all three methods are doublet, and "singlet" otherwise
# - confusion_call: tp/tn/fp/fn based on the `consensus_call` column
dataset_df <- dataset_df |>
dplyr::rowwise() |>
# Add column `consensus_call`
dplyr::mutate(
consensus_call = dplyr::if_else(
all(c(scdbl_prediction, scrublet_prediction, cxds_prediction) == "doublet"),
"doublet",
"singlet"
)
) |>
# Add column `consensus_class`
dplyr::mutate(
consensus_class = dplyr::case_when(
consensus_call == "doublet" ~ "doublet",
all(c(scdbl_prediction, scrublet_prediction, cxds_prediction) == "singlet") ~ "singlet",
.default = "ambiguous"
)
) |>
# Add column `confusion_call`
dplyr::mutate(
confusion_call = dplyr::case_when(
consensus_call == "doublet" && ground_truth == "doublet" ~ "tp",
consensus_call == "singlet" && ground_truth == "singlet" ~ "tn",
consensus_call == "doublet" && ground_truth == "singlet" ~ "fp",
consensus_call == "singlet" && ground_truth == "doublet" ~ "fn"
)
)
return(dataset_df)
}
) |>
purrr::set_names(dataset_names)
This section contains upset plots that show overlap across doublet calls from each method, displayed for each dataset.
upset_list <- doublet_df_list |>
purrr::iwalk(
\(df, dataset) {
doublet_barcodes <- list(
"scDblFinder" = df$barcodes[df$scdbl_prediction == "doublet"],
"scrublet" = df$barcodes[df$scrublet_prediction == "doublet"],
"cxds" = df$barcodes[df$cxds_prediction == "doublet"]
)
UpSetR::upset(fromList(doublet_barcodes), order.by = "freq") |> print()
grid::grid.text( # plot title
dataset,
x = 0.65,
y = 0.95,
gp = grid::gpar(fontsize=16)
)
}
)
This section visualizes and evaluates the consensus doublet calls across each dataset.
This section plots the PCA for each dataset, clockwise from the top left:
scDblFinder singlet/doublet callsscrublet singlet/doublet callscxds singlet/doublet callsFor the first five panels, doublets are shown in black and singlets in light gray.
doublet_df_list |>
purrr::imap(
\(df, dataset) {
pc_color_columns <- c("scDblFinder prediction" = "scdbl_prediction",
"scublet prediction" = "scrublet_prediction",
"cxds prediction" = "cxds_prediction",
"Ground truth" = "ground_truth",
"Consensus call" = "consensus_call")
pc_plot_list <- pc_color_columns |>
purrr::imap(
\(color_column, plot_title) {
plot_pca_calls(
df,
color_column = !!sym(color_column),
title = plot_title
)}
)
# note that the list name isn't used here
pc_plot_list$metrics <- plot_pca_metrics(
df,
confusion_call,
metric_colors = confusion_colors
)
patchwork::wrap_plots(pc_plot_list) +
plot_annotation(
glue::glue("PCA for {dataset}"),
theme = theme(plot.title = element_text(size = 16))) +
plot_layout(guides = "collect")
}
)
$`hm-6k`
$`HMEC-orig-MULTI`
$`pbmc-1B-dm`
$`pdx-MULTI`
This section calculates a confusion matrix and associated statistics on the consensus classes.
metric_df <- doublet_df_list |>
purrr::imap(
\(df, dataset) {
print(glue::glue("======================== {dataset} ========================"))
cat("Table of consensus calls:")
print(table(df$consensus_class))
cat("\n\n")
confusion_result <- caret::confusionMatrix(
# truth should be first
table(
"Truth" = df$ground_truth,
"Consensus prediction" = df$consensus_call
),
positive = "doublet"
)
print(confusion_result)
# Extract information we want to present later in a table
tibble::tibble(
"Dataset name" = dataset,
"Kappa" = round(confusion_result$overall["Kappa"], 3),
"Balanced accuracy" = round(confusion_result$byClass["Balanced Accuracy"], 3)
)
}
) |>
dplyr::bind_rows()
======================== hm-6k ========================
Table of consensus calls:
ambiguous doublet singlet
283 138 6385
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 138 33
singlet 0 6635
Accuracy : 0.9952
95% CI : (0.9932, 0.9967)
No Information Rate : 0.9797
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.8908
Mcnemar's Test P-Value : 2.54e-08
Sensitivity : 1.00000
Specificity : 0.99505
Pos Pred Value : 0.80702
Neg Pred Value : 1.00000
Prevalence : 0.02028
Detection Rate : 0.02028
Detection Prevalence : 0.02512
Balanced Accuracy : 0.99753
'Positive' Class : doublet
======================== HMEC-orig-MULTI ========================
Table of consensus calls:
ambiguous doublet singlet
3206 641 22579
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 506 3062
singlet 135 22723
Accuracy : 0.879
95% CI : (0.875, 0.8829)
No Information Rate : 0.9757
P-Value [Acc > NIR] : 1
Kappa : 0.2079
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.78939
Specificity : 0.88125
Pos Pred Value : 0.14182
Neg Pred Value : 0.99409
Prevalence : 0.02426
Detection Rate : 0.01915
Detection Prevalence : 0.13502
Balanced Accuracy : 0.83532
'Positive' Class : doublet
======================== pbmc-1B-dm ========================
Table of consensus calls:
ambiguous doublet singlet
487 38 3265
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 26 104
singlet 12 3648
Accuracy : 0.9694
95% CI : (0.9634, 0.9746)
No Information Rate : 0.99
P-Value [Acc > NIR] : 1
Kappa : 0.2986
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.68421
Specificity : 0.97228
Pos Pred Value : 0.20000
Neg Pred Value : 0.99672
Prevalence : 0.01003
Detection Rate : 0.00686
Detection Prevalence : 0.03430
Balanced Accuracy : 0.82825
'Positive' Class : doublet
======================== pdx-MULTI ========================
Table of consensus calls:
ambiguous doublet singlet
2945 4 7347
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 3 1314
singlet 1 8978
Accuracy : 0.8723
95% CI : (0.8657, 0.8787)
No Information Rate : 0.9996
P-Value [Acc > NIR] : 1
Kappa : 0.0038
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.7500000
Specificity : 0.8723280
Pos Pred Value : 0.0022779
Neg Pred Value : 0.9998886
Prevalence : 0.0003885
Detection Rate : 0.0002914
Detection Prevalence : 0.1279138
Balanced Accuracy : 0.8111640
'Positive' Class : doublet
Overall, methods do not have substantial overlap with each other. They each tend to detect different sets of doublets, leading to fairly small sets of consensus doublets. Further, the consensus doublets called by all three methods have some, but not substantial, overlap with the ground truth.
For three out of four datasets, scDblFinder predicts a
much larger number of doublets compared to other methods.
Based on the PCAs, it additionally looks like there are many more
false negatives in the consensus prediction that cxds
appears to capture but other methods do not.
The table below summarizes performance of the “consensus caller”.
Note that, in the benchmarking paper
these datasets were originally analyzed in, hm-6k was
observed to be one of the “easiest” datasets to classify across
methods.
Consistent with that observation, it has the highest kappa
value here.
metric_df
# record the versions of the packages used in this analysis and other environment information
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices datasets utils methods base
other attached packages:
[1] UpSetR_1.4.0 caret_6.0-94 lattice_0.22-6
[4] patchwork_1.2.0 ggplot2_3.5.1 SingleCellExperiment_1.26.0
[7] SummarizedExperiment_1.34.0 Biobase_2.64.0 GenomicRanges_1.56.0
[10] GenomeInfoDb_1.40.0 IRanges_2.38.0 S4Vectors_0.42.0
[13] BiocGenerics_0.50.0 MatrixGenerics_1.16.0 matrixStats_1.3.0
loaded via a namespace (and not attached):
[1] pROC_1.18.5 gridExtra_2.3 rlang_1.1.3
[4] magrittr_2.0.3 e1071_1.7-14 compiler_4.4.0
[7] DelayedMatrixStats_1.26.0 vctrs_0.6.5 reshape2_1.4.4
[10] stringr_1.5.1 pkgconfig_2.0.3 crayon_1.5.2
[13] XVector_0.44.0 labeling_0.4.3 scuttle_1.14.0
[16] utf8_1.2.4 prodlim_2023.08.28 tzdb_0.4.0
[19] UCSC.utils_1.0.0 bit_4.0.5 purrr_1.0.2
[22] xfun_0.43 beachmat_2.20.0 zlibbioc_1.50.0
[25] jsonlite_1.8.8 recipes_1.0.10 DelayedArray_0.30.1
[28] BiocParallel_1.38.0 parallel_4.4.0 R6_2.5.1
[31] stringi_1.8.4 parallelly_1.37.1 rpart_4.1.23
[34] lubridate_1.9.3 Rcpp_1.0.12 iterators_1.0.14
[37] knitr_1.46 future.apply_1.11.2 readr_2.1.5
[40] Matrix_1.7-0 splines_4.4.0 nnet_7.3-19
[43] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.16.0
[46] abind_1.4-5 yaml_2.3.8 timeDate_4032.109
[49] codetools_0.2-20 listenv_0.9.1 tibble_3.2.1
[52] plyr_1.8.9 withr_3.0.0 future_1.33.2
[55] survival_3.5-8 proxy_0.4-27 pillar_1.9.0
[58] BiocManager_1.30.23 renv_1.0.7 foreach_1.5.2
[61] generics_0.1.3 vroom_1.6.5 rprojroot_2.0.4
[64] hms_1.1.3 sparseMatrixStats_1.16.0 munsell_0.5.1
[67] scales_1.3.0 globals_0.16.3 class_7.3-22
[70] glue_1.7.0 tools_4.4.0 data.table_1.15.4
[73] ModelMetrics_1.2.2.2 gower_1.0.1 grid_4.4.0
[76] ipred_0.9-14 colorspace_2.1-0 nlme_3.1-164
[79] GenomeInfoDbData_1.2.12 cli_3.6.2 fansi_1.0.6
[82] S4Arrays_1.4.0 lava_1.8.0 dplyr_1.1.4
[85] gtable_0.3.5 digest_0.6.35 SparseArray_1.4.3
[88] farver_2.1.1 lifecycle_1.0.4 hardhat_1.3.1
[91] httr_1.4.7 bit64_4.0.5 MASS_7.3-60.2